Checking against ground-truth

Doing clustering is great for segmentation, but how reliable are the clusters?

Let’s run some quick k-means clustering on the classic iris dataset.

Unusually for clustering, in this instance we actually have some ‘ground truth labels’ (species) that we can see how well our clusters overlap with:

require(plotly); require(reshape2)

set.seed(99)

# Import data
data("iris")
iris$Species <- as.character(iris$Species)

# Define min-max
min_max_scale <- function(x) {
  return((x - min(x))/(max(x) - min(x)))
}

iris_raw <- iris[,-5]
iris_raw <- apply(iris_raw, 2, min_max_scale)

# First clustering
naive_kcluster <- stats::kmeans(x = iris_raw[,-5], centers = 3)

# Check performance
naive_tab <- table(iris$Species, naive_kcluster$cluster)
naive_tab <- dcast(data = data.frame(naive_tab), formula = Var2 ~ Var1, value.var = 'Freq')
names(naive_tab)[1] <- 'Cluster'
naive_tab$Performance <- round(apply(naive_tab[,2:4], 1, max)/apply(naive_tab[,2:4], 1, sum),3)
naive_tab[rev(order(naive_tab$Performance)),]

Using PCA to make higher-performant clusters

Clustering weights about each feature equally. But some features are highly correlated (so effectively ‘double-weighting’ in clustering)! We can see this when looking at a correlation matrix of the features:

data.frame(round(cor(iris[,-5]),3))

So to do this more reliably, we can:

We can look at how well the clusters overlap with the ‘ground truth’ labels to check this:

# PCA
iris_pca <- stats::prcomp(iris[,-5])
iris_pca$x <- apply(iris_pca$x, 2, min_max_scale)
weights <- iris_pca$sdev^2/max(iris_pca$sdev^2)

for(i in 1:length(weights)) {
  iris_pca$x[,i] <- iris_pca$x[,i] * weights[i]
}

# PCA clustering
pca_cluster <- stats::kmeans(x = iris_pca$x, centers = 3)

# Check performance
pca_tab <- table(iris$Species, pca_cluster$cluster)
pca_tab <- dcast(data = data.frame(pca_tab), formula = Var2 ~ Var1, value.var = 'Freq')
names(pca_tab)[1] <- 'Cluster'
pca_tab$Performance <- round(apply(pca_tab[,2:4], 1, max)/apply(pca_tab[,2:4], 1, sum),3)
pca_tab[rev(order(pca_tab$Performance)),]

# Convex hull of species for plotting
species_chull <- sapply(unique(iris$Species), FUN = function(i) {
  points <- grDevices::chull(x = iris_pca$x[iris$Species == i,1:2])
  return(c(points,points[1]))
})

plot_ly(type = 'scatter') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 1,1], y = iris_pca$x[pca_cluster$cluster == 1,2], name = 'Cluster 1') %>%
  add_trace(x = iris_pca$x[iris$Species == unique(iris$Species)[1],1][species_chull[[1]]], 
            y = iris_pca$x[iris$Species == unique(iris$Species)[1],2][species_chull[[1]]], mode = 'lines', name = unique(iris$Species)[1]) %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 2,1], y = iris_pca$x[pca_cluster$cluster == 2,2], name = 'Cluster 2') %>%
  add_trace(x = iris_pca$x[iris$Species == unique(iris$Species)[2],1][species_chull[[2]]], 
            y = iris_pca$x[iris$Species == unique(iris$Species)[2],2][species_chull[[2]]], mode = 'lines', name = unique(iris$Species)[2]) %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 3,1], y = iris_pca$x[pca_cluster$cluster == 3,2], name = 'Cluster 3') %>%
  add_trace(x = iris_pca$x[iris$Species == unique(iris$Species)[3],1][species_chull[[3]]], 
            y = iris_pca$x[iris$Species == unique(iris$Species)[3],2][species_chull[[3]]], mode = 'lines', name = unique(iris$Species)[3]) %>%
  layout(xaxis = list(title = 'PC1'), yaxis = list(title = 'PC2'))

Assessing performance in an unsupervised way

However, what would we do if the ground truth labels don’t exist? How would we know if the pca clustering is more highly performant?

Some measures look at cluster compactness within cluster and seperation between clusters. Well the far left cluster looks very compact and seperated from the other clusters, so is maybe more reliable. The other two clusters on the other hand have some overlap.

But although measuring this works for this clustering scenario (e.g Dunn Index), for other clustering shapes this is less reliable!

So how can we more reliably measure this? Simualations:


random_samples <- lapply(1:10000, FUN = function(x) sample(1:nrow(iris_pca$x), replace = T))

# Bootsrapped clustering
many_runs <- lapply(random_samples, FUN = function(i) {
  clusters <- stats::kmeans(x = iris_pca$x[i,], centers = 3)$cluster
  val <- rep(NA, length(i))
  val[i] <- clusters
  return(val)
  }
)

# Jaccard index 
first_run <- list();
cluster_map <- list();
for(i in unique(pca_cluster$cluster)) {
  first_run[[i]] <- lapply(many_runs, FUN = function(x) {
    tab <- table(pca_cluster$cluster == i, x)
    tab <- tab[2,]/apply(tab, 2, sum)
    return(tab)
    })
  cluster_map[[i]] <- sapply(first_run[[i]], FUN = function(x) which.max(x))
}

Overall, the pairs end up in the same original cluster 95% of the time, but there is variation between clusters:


df <- data.frame(tapply(pair_sameness, INDEX = pca_cluster$cluster, FUN = mean))
df$Cluster <- rownames(df)
rownames(df) <- NULL
df$Performance <- df[,1]

df[rev(order(df$Performance)),c('Cluster', 'Performance')]

We can capture the uncertainty of the edges of the clusters with a plot of the convex hull of cluster boundaries for observations that ended up in the same cluster > x% of the time:

all_data <- data.frame(iris_pca$x)
all_data$cluster <- pca_cluster$cluster

all_data <- all_data[pair_sameness  >= 0.6,]

perf_points <- sapply(1:3, FUN = function(i) {
  rel_points <- grDevices::chull(x = all_data[all_data$cluster == i,1:2])
  return(c(rel_points,rel_points[1]))
})

plot_ly(type = 'scatter') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 1,1], y = iris_pca$x[pca_cluster$cluster == 1,2], name = 'cluster 1') %>%
  add_trace(x = all_data$PC1[all_data$cluster == 1][perf_points[[1]]], y = all_data$PC2[all_data$cluster == 1][perf_points[[1]]], mode = 'lines', name = '>= 80% stable cluster boundary') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 2,1], y = iris_pca$x[pca_cluster$cluster == 2,2]) %>%
  add_trace(x = all_data$PC1[all_data$cluster == 2][perf_points[[2]]], y = all_data$PC2[all_data$cluster == 2][perf_points[[2]]], mode = 'lines') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 3,1], y = iris_pca$x[pca_cluster$cluster == 3,2]) %>%
  add_trace(x = all_data$PC1[all_data$cluster == 3][perf_points[[3]]], y = all_data$PC2[all_data$cluster == 3][perf_points[[3]]], mode = 'lines') %>%
  layout(showlegend = F, xaxis = list(title = 'PC1'), yaxis = list(title = 'PC2'), title = '60% pairness stability')
all_data <- data.frame(iris_pca$x)
all_data$cluster <- pca_cluster$cluster

all_data <- all_data[pair_sameness >= 0.8,]

perf_points <- sapply(1:3, FUN = function(i) {
  rel_points <- grDevices::chull(x = all_data[all_data$cluster == i,1:2])
  return(c(rel_points,rel_points[1]))
})

plot_ly(type = 'scatter') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 1,1], y = iris_pca$x[pca_cluster$cluster == 1,2], name = 'cluster 1') %>%
  add_trace(x = all_data$PC1[all_data$cluster == 1][perf_points[[1]]], y = all_data$PC2[all_data$cluster == 1][perf_points[[1]]], mode = 'lines', name = '>= 80% stable cluster boundary') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 2,1], y = iris_pca$x[pca_cluster$cluster == 2,2]) %>%
  add_trace(x = all_data$PC1[all_data$cluster == 2][perf_points[[2]]], y = all_data$PC2[all_data$cluster == 2][perf_points[[2]]], mode = 'lines') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 3,1], y = iris_pca$x[pca_cluster$cluster == 3,2]) %>%
  add_trace(x = all_data$PC1[all_data$cluster == 3][perf_points[[3]]], y = all_data$PC2[all_data$cluster == 3][perf_points[[3]]], mode = 'lines') %>%
  layout(showlegend = F, xaxis = list(title = 'PC1'), yaxis = list(title = 'PC2'), title = '80% pairness stability')
all_data <- data.frame(iris_pca$x)
all_data$cluster <- pca_cluster$cluster

all_data <- all_data[pair_sameness  >= 0.95,]

perf_points <- sapply(1:3, FUN = function(i) {
  rel_points <- grDevices::chull(x = all_data[all_data$cluster == i,1:2])
  return(c(rel_points,rel_points[1]))
})

plot_ly(type = 'scatter') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 1,1], y = iris_pca$x[pca_cluster$cluster == 1,2], name = 'cluster 1') %>%
  add_trace(x = all_data$PC1[all_data$cluster == 1][perf_points[[1]]], y = all_data$PC2[all_data$cluster == 1][perf_points[[1]]], mode = 'lines', name = '>= 80% stable cluster boundary') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 2,1], y = iris_pca$x[pca_cluster$cluster == 2,2]) %>%
  add_trace(x = all_data$PC1[all_data$cluster == 2][perf_points[[2]]], y = all_data$PC2[all_data$cluster == 2][perf_points[[2]]], mode = 'lines') %>%
  add_trace(x = iris_pca$x[pca_cluster$cluster == 3,1], y = iris_pca$x[pca_cluster$cluster == 3,2]) %>%
  add_trace(x = all_data$PC1[all_data$cluster == 3][perf_points[[3]]], y = all_data$PC2[all_data$cluster == 3][perf_points[[3]]], mode = 'lines') %>%
  layout(showlegend = F, xaxis = list(title = 'PC1'), yaxis = list(title = 'PC2'), title = '95% pairness stability')
LS0tDQp0aXRsZTogIkNsdXN0ZXIgU3RhYmlsaXR5Ig0Kb3V0cHV0OiANCiAgICBodG1sX25vdGVib29rOg0KICAgICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgICBzbWFydDogZmFsc2UNCiAgICBodG1sX2RvY3VtZW50OiBkZWZhdWx0DQogICAgcGRmX2RvY3VtZW50OiBkZWZhdWx0DQotLS0NCg0KIyMjIENoZWNraW5nIGFnYWluc3QgZ3JvdW5kLXRydXRoDQoNCkRvaW5nIGNsdXN0ZXJpbmcgaXMgZ3JlYXQgZm9yIHNlZ21lbnRhdGlvbiwgYnV0IGhvdyByZWxpYWJsZSBhcmUgdGhlIGNsdXN0ZXJzPw0KDQpMZXQncyBydW4gc29tZSBxdWljayBrLW1lYW5zIGNsdXN0ZXJpbmcgb24gdGhlIGNsYXNzaWMgaXJpcyBkYXRhc2V0Lg0KDQpVbnVzdWFsbHkgZm9yIGNsdXN0ZXJpbmcsIGluIHRoaXMgaW5zdGFuY2Ugd2UgYWN0dWFsbHkgaGF2ZSBzb21lICdncm91bmQgdHJ1dGggbGFiZWxzJyAoc3BlY2llcykgdGhhdCB3ZSBjYW4gc2VlIGhvdyB3ZWxsIG91ciBjbHVzdGVycyBvdmVybGFwIHdpdGg6DQoNCmBgYHtyIG5haXZlX3Bsb3QsIG1lc3NhZ2U9Rix3YXJuaW5nPUYsZmlnLndpZHRoPTksIGZpZy5oZWlnaHQ9Nn0NCnJlcXVpcmUocGxvdGx5KTsgcmVxdWlyZShyZXNoYXBlMikNCg0Kc2V0LnNlZWQoOTkpDQoNCiMgSW1wb3J0IGRhdGENCmRhdGEoImlyaXMiKQ0KaXJpcyRTcGVjaWVzIDwtIGFzLmNoYXJhY3RlcihpcmlzJFNwZWNpZXMpDQoNCiMgRGVmaW5lIG1pbi1tYXgNCm1pbl9tYXhfc2NhbGUgPC0gZnVuY3Rpb24oeCkgew0KICByZXR1cm4oKHggLSBtaW4oeCkpLyhtYXgoeCkgLSBtaW4oeCkpKQ0KfQ0KDQppcmlzX3JhdyA8LSBpcmlzWywtNV0NCmlyaXNfcmF3IDwtIGFwcGx5KGlyaXNfcmF3LCAyLCBtaW5fbWF4X3NjYWxlKQ0KDQojIEZpcnN0IGNsdXN0ZXJpbmcNCm5haXZlX2tjbHVzdGVyIDwtIHN0YXRzOjprbWVhbnMoeCA9IGlyaXNfcmF3WywtNV0sIGNlbnRlcnMgPSAzKQ0KDQojIENoZWNrIHBlcmZvcm1hbmNlDQpuYWl2ZV90YWIgPC0gdGFibGUoaXJpcyRTcGVjaWVzLCBuYWl2ZV9rY2x1c3RlciRjbHVzdGVyKQ0KbmFpdmVfdGFiIDwtIGRjYXN0KGRhdGEgPSBkYXRhLmZyYW1lKG5haXZlX3RhYiksIGZvcm11bGEgPSBWYXIyIH4gVmFyMSwgdmFsdWUudmFyID0gJ0ZyZXEnKQ0KbmFtZXMobmFpdmVfdGFiKVsxXSA8LSAnQ2x1c3RlcicNCm5haXZlX3RhYiRQZXJmb3JtYW5jZSA8LSByb3VuZChhcHBseShuYWl2ZV90YWJbLDI6NF0sIDEsIG1heCkvYXBwbHkobmFpdmVfdGFiWywyOjRdLCAxLCBzdW0pLDMpDQpuYWl2ZV90YWJbcmV2KG9yZGVyKG5haXZlX3RhYiRQZXJmb3JtYW5jZSkpLF0NCmBgYA0KDQojIyMgVXNpbmcgUENBIHRvIG1ha2UgaGlnaGVyLXBlcmZvcm1hbnQgY2x1c3RlcnMNCg0KQ2x1c3RlcmluZyB3ZWlnaHRzIGFib3V0IGVhY2ggZmVhdHVyZSBlcXVhbGx5LiBCdXQgc29tZSBmZWF0dXJlcyBhcmUgaGlnaGx5IGNvcnJlbGF0ZWQgKHNvIGVmZmVjdGl2ZWx5ICdkb3VibGUtd2VpZ2h0aW5nJyBpbiBjbHVzdGVyaW5nKSENCldlIGNhbiBzZWUgdGhpcyB3aGVuIGxvb2tpbmcgYXQgYSBjb3JyZWxhdGlvbiBtYXRyaXggb2YgdGhlIGZlYXR1cmVzOg0KDQpgYGB7ciBjb3JyX3Jhd30NCmRhdGEuZnJhbWUocm91bmQoY29yKGlyaXNbLC01XSksMykpDQpgYGANCg0KU28gdG8gZG8gdGhpcyBtb3JlIHJlbGlhYmx5LCB3ZSBjYW46DQoNCiogVXNlIFBDQSB0byBtYWtlIGZlYXR1cmVzIG9ydGhvZ29uYWwgKHNvIHdlIGRvIG5vdCAnb3ZlcndlaWdodCcgYnkgY29ycmVsYXRlZCBmZWF0dXJlcykNCiogV2VpZ2h0IHRoZSBmZWF0dXJlcyBieSB0aGUgaW52ZXJzZSBvZiB0aGUgc2QgZXhwbGFpbmVkIGJ5IGVhY2ggY29tcG9uZW50IChhcyBub3QgYWxsIG9mIHRoZXNlIGNvbXBvbmVudHMgc2hvdWxkIGJlIHRyZWF0ZWQgZXF1YWxseSkNCg0KV2UgY2FuIGxvb2sgYXQgaG93IHdlbGwgdGhlIGNsdXN0ZXJzIG92ZXJsYXAgd2l0aCB0aGUgJ2dyb3VuZCB0cnV0aCcgbGFiZWxzIHRvIGNoZWNrIHRoaXM6DQoNCg0KYGBge3IgZmlyc3RfcGxvdCwgbWVzc2FnZT1GLHdhcm5pbmc9RixmaWcud2lkdGg9OSwgZmlnLmhlaWdodD02fQ0KIyBQQ0ENCmlyaXNfcGNhIDwtIHN0YXRzOjpwcmNvbXAoaXJpc1ssLTVdKQ0KaXJpc19wY2EkeCA8LSBhcHBseShpcmlzX3BjYSR4LCAyLCBtaW5fbWF4X3NjYWxlKQ0Kd2VpZ2h0cyA8LSBpcmlzX3BjYSRzZGV2XjIvbWF4KGlyaXNfcGNhJHNkZXZeMikNCg0KZm9yKGkgaW4gMTpsZW5ndGgod2VpZ2h0cykpIHsNCiAgaXJpc19wY2EkeFssaV0gPC0gaXJpc19wY2EkeFssaV0gKiB3ZWlnaHRzW2ldDQp9DQoNCiMgUENBIGNsdXN0ZXJpbmcNCnBjYV9jbHVzdGVyIDwtIHN0YXRzOjprbWVhbnMoeCA9IGlyaXNfcGNhJHgsIGNlbnRlcnMgPSAzKQ0KDQojIENoZWNrIHBlcmZvcm1hbmNlDQpwY2FfdGFiIDwtIHRhYmxlKGlyaXMkU3BlY2llcywgcGNhX2NsdXN0ZXIkY2x1c3RlcikNCnBjYV90YWIgPC0gZGNhc3QoZGF0YSA9IGRhdGEuZnJhbWUocGNhX3RhYiksIGZvcm11bGEgPSBWYXIyIH4gVmFyMSwgdmFsdWUudmFyID0gJ0ZyZXEnKQ0KbmFtZXMocGNhX3RhYilbMV0gPC0gJ0NsdXN0ZXInDQpwY2FfdGFiJFBlcmZvcm1hbmNlIDwtIHJvdW5kKGFwcGx5KHBjYV90YWJbLDI6NF0sIDEsIG1heCkvYXBwbHkocGNhX3RhYlssMjo0XSwgMSwgc3VtKSwzKQ0KcGNhX3RhYltyZXYob3JkZXIocGNhX3RhYiRQZXJmb3JtYW5jZSkpLF0NCg0KIyBDb252ZXggaHVsbCBvZiBzcGVjaWVzIGZvciBwbG90dGluZw0Kc3BlY2llc19jaHVsbCA8LSBzYXBwbHkodW5pcXVlKGlyaXMkU3BlY2llcyksIEZVTiA9IGZ1bmN0aW9uKGkpIHsNCiAgcG9pbnRzIDwtIGdyRGV2aWNlczo6Y2h1bGwoeCA9IGlyaXNfcGNhJHhbaXJpcyRTcGVjaWVzID09IGksMToyXSkNCiAgcmV0dXJuKGMocG9pbnRzLHBvaW50c1sxXSkpDQp9KQ0KDQpwbG90X2x5KHR5cGUgPSAnc2NhdHRlcicpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAxLDFdLCB5ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDEsMl0sIG5hbWUgPSAnQ2x1c3RlciAxJykgJT4lDQogIGFkZF90cmFjZSh4ID0gaXJpc19wY2EkeFtpcmlzJFNwZWNpZXMgPT0gdW5pcXVlKGlyaXMkU3BlY2llcylbMV0sMV1bc3BlY2llc19jaHVsbFtbMV1dXSwgDQogICAgICAgICAgICB5ID0gaXJpc19wY2EkeFtpcmlzJFNwZWNpZXMgPT0gdW5pcXVlKGlyaXMkU3BlY2llcylbMV0sMl1bc3BlY2llc19jaHVsbFtbMV1dXSwgbW9kZSA9ICdsaW5lcycsIG5hbWUgPSB1bmlxdWUoaXJpcyRTcGVjaWVzKVsxXSkgJT4lDQogIGFkZF90cmFjZSh4ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDIsMV0sIHkgPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMiwyXSwgbmFtZSA9ICdDbHVzdGVyIDInKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBpcmlzX3BjYSR4W2lyaXMkU3BlY2llcyA9PSB1bmlxdWUoaXJpcyRTcGVjaWVzKVsyXSwxXVtzcGVjaWVzX2NodWxsW1syXV1dLCANCiAgICAgICAgICAgIHkgPSBpcmlzX3BjYSR4W2lyaXMkU3BlY2llcyA9PSB1bmlxdWUoaXJpcyRTcGVjaWVzKVsyXSwyXVtzcGVjaWVzX2NodWxsW1syXV1dLCBtb2RlID0gJ2xpbmVzJywgbmFtZSA9IHVuaXF1ZShpcmlzJFNwZWNpZXMpWzJdKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMywxXSwgeSA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAzLDJdLCBuYW1lID0gJ0NsdXN0ZXIgMycpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGlyaXNfcGNhJHhbaXJpcyRTcGVjaWVzID09IHVuaXF1ZShpcmlzJFNwZWNpZXMpWzNdLDFdW3NwZWNpZXNfY2h1bGxbWzNdXV0sIA0KICAgICAgICAgICAgeSA9IGlyaXNfcGNhJHhbaXJpcyRTcGVjaWVzID09IHVuaXF1ZShpcmlzJFNwZWNpZXMpWzNdLDJdW3NwZWNpZXNfY2h1bGxbWzNdXV0sIG1vZGUgPSAnbGluZXMnLCBuYW1lID0gdW5pcXVlKGlyaXMkU3BlY2llcylbM10pICU+JQ0KICBsYXlvdXQoeGF4aXMgPSBsaXN0KHRpdGxlID0gJ1BDMScpLCB5YXhpcyA9IGxpc3QodGl0bGUgPSAnUEMyJykpDQpgYGANCg0KIyMjIEFzc2Vzc2luZyBwZXJmb3JtYW5jZSBpbiBhbiB1bnN1cGVydmlzZWQgd2F5DQoNCkhvd2V2ZXIsIHdoYXQgd291bGQgd2UgZG8gaWYgdGhlIGdyb3VuZCB0cnV0aCBsYWJlbHMgZG9uJ3QgZXhpc3Q/IEhvdyB3b3VsZCB3ZSBrbm93IGlmIHRoZSBwY2EgY2x1c3RlcmluZyBpcyBtb3JlIGhpZ2hseSBwZXJmb3JtYW50Pw0KDQpTb21lIG1lYXN1cmVzIGxvb2sgYXQgY2x1c3RlciBjb21wYWN0bmVzcyB3aXRoaW4gY2x1c3RlciBhbmQgc2VwZXJhdGlvbiBiZXR3ZWVuIGNsdXN0ZXJzLiBXZWxsIHRoZSBmYXIgbGVmdCBjbHVzdGVyIGxvb2tzIHZlcnkgY29tcGFjdCBhbmQgc2VwZXJhdGVkIGZyb20gdGhlIG90aGVyIGNsdXN0ZXJzLCBzbyBpcyBtYXliZSBtb3JlIHJlbGlhYmxlLiBUaGUgb3RoZXIgdHdvIGNsdXN0ZXJzIG9uIHRoZSBvdGhlciBoYW5kIGhhdmUgc29tZSBvdmVybGFwLiANCg0KQnV0IGFsdGhvdWdoIG1lYXN1cmluZyB0aGlzIHdvcmtzIGZvciB0aGlzIGNsdXN0ZXJpbmcgc2NlbmFyaW8gKGUuZyBEdW5uIEluZGV4KSwgZm9yIG90aGVyIGNsdXN0ZXJpbmcgc2hhcGVzIHRoaXMgaXMgbGVzcyByZWxpYWJsZSENCg0KU28gaG93IGNhbiB3ZSBtb3JlIHJlbGlhYmx5IG1lYXN1cmUgdGhpcz8gU2ltdWFsYXRpb25zOg0KDQoqIFNhbXBsZSB0aGUgcGFpcnMgd2l0aCByZXBsYWNlbWVudCAoYm9vdHN0cmFwKSBhbmQgcmUtcnVuIHRoZSBjbHVzdGVyaW5nIGFsZ28gZWFjaCB0aW1lDQoqIENoZWNrIGhvdyBvZnRlbiB0aGUgY2x1c3RlciBhc3NpZ25lZCB0byB0aGUgc2FtcGxlZCBwYWlycyBlbmQgdXAgaW4gdGhlIHNhbWUgb3JpZ2luYWwgY2x1c3Rlcg0KDQpgYGB7ciBjaGVja19zdGFiaWxpdHksIG1lc3NhZ2U9Rix3YXJuaW5nPUYsZmlnLndpZHRoPTksIGZpZy5oZWlnaHQ9Nn0NCg0KcmFuZG9tX3NhbXBsZXMgPC0gbGFwcGx5KDE6MTAwMDAsIEZVTiA9IGZ1bmN0aW9uKHgpIHNhbXBsZSgxOm5yb3coaXJpc19wY2EkeCksIHJlcGxhY2UgPSBUKSkNCg0KIyBCb290c3JhcHBlZCBjbHVzdGVyaW5nDQptYW55X3J1bnMgPC0gbGFwcGx5KHJhbmRvbV9zYW1wbGVzLCBGVU4gPSBmdW5jdGlvbihpKSB7DQogIGNsdXN0ZXJzIDwtIHN0YXRzOjprbWVhbnMoeCA9IGlyaXNfcGNhJHhbaSxdLCBjZW50ZXJzID0gMykkY2x1c3Rlcg0KICB2YWwgPC0gcmVwKE5BLCBsZW5ndGgoaSkpDQogIHZhbFtpXSA8LSBjbHVzdGVycw0KICByZXR1cm4odmFsKQ0KICB9DQopDQoNCiMgSmFjY2FyZCBpbmRleCANCmZpcnN0X3J1biA8LSBsaXN0KCk7DQpjbHVzdGVyX21hcCA8LSBsaXN0KCk7DQpmb3IoaSBpbiB1bmlxdWUocGNhX2NsdXN0ZXIkY2x1c3RlcikpIHsNCiAgZmlyc3RfcnVuW1tpXV0gPC0gbGFwcGx5KG1hbnlfcnVucywgRlVOID0gZnVuY3Rpb24oeCkgew0KICAgIHRhYiA8LSB0YWJsZShwY2FfY2x1c3RlciRjbHVzdGVyID09IGksIHgpDQogICAgdGFiIDwtIHRhYlsyLF0vYXBwbHkodGFiLCAyLCBzdW0pDQogICAgcmV0dXJuKHRhYikNCiAgICB9KQ0KICBjbHVzdGVyX21hcFtbaV1dIDwtIHNhcHBseShmaXJzdF9ydW5bW2ldXSwgRlVOID0gZnVuY3Rpb24oeCkgd2hpY2gubWF4KHgpKQ0KfQ0KY2x1c3Rlcl9tYXAgPC0gZG8uY2FsbChyYmluZCwgY2x1c3Rlcl9tYXApDQoNCiMgUmVuYW1lIGNsdXN0ZXJzDQpyZW5hbWVfY2x1c3RlcnMgPC0gc2FwcGx5KDE6bGVuZ3RoKG1hbnlfcnVucyksIEZVTiA9IGZ1bmN0aW9uKGkpIHttYXRjaChtYW55X3J1bnNbW2ldXSwgY2x1c3Rlcl9tYXBbLGldKX0pDQoNCiMgQXNlc3MgaG93IG9mdGVuIHRoZSBwYWlycyBhcmUgaW4gdGhlIHNhbWUgcGxhY2UNCnBhaXJfc2FtZW5lc3MgPC0gYXBwbHkocmVuYW1lX2NsdXN0ZXJzLCAxLCBGVU4gPSBmdW5jdGlvbih4KSBtYXgodGFibGUoeCwgdXNlTkEgPSAnbm8nKSkpL2FwcGx5KHJlbmFtZV9jbHVzdGVycywgMSwgRlVOID0gZnVuY3Rpb24oeCkgc3VtKCFpcy5uYSh4KSkpDQoNCmBgYA0KDQpPdmVyYWxsLCB0aGUgcGFpcnMgZW5kIHVwIGluIHRoZSBzYW1lIG9yaWdpbmFsIGNsdXN0ZXIgKipgciBzY2FsZXM6OnBlcmNlbnQobWVhbihwYWlyX3NhbWVuZXNzKSlgKiogIG9mIHRoZSB0aW1lLCBidXQgdGhlcmUgaXMgdmFyaWF0aW9uIGJldHdlZW4gY2x1c3RlcnM6DQoNCmBgYHtyIHJlc3VsdHMsIG1lc3NhZ2U9Rix3YXJuaW5nPUYsZmlnLndpZHRoPTksIGZpZy5oZWlnaHQ9Nn0NCg0KZGYgPC0gZGF0YS5mcmFtZSh0YXBwbHkocGFpcl9zYW1lbmVzcywgSU5ERVggPSBwY2FfY2x1c3RlciRjbHVzdGVyLCBGVU4gPSBtZWFuKSkNCmRmJENsdXN0ZXIgPC0gcm93bmFtZXMoZGYpDQpyb3duYW1lcyhkZikgPC0gTlVMTA0KZGYkUGVyZm9ybWFuY2UgPC0gZGZbLDFdDQoNCmRmW3JldihvcmRlcihkZiRQZXJmb3JtYW5jZSkpLGMoJ0NsdXN0ZXInLCAnUGVyZm9ybWFuY2UnKV0NCmBgYA0KDQpXZSBjYW4gY2FwdHVyZSB0aGUgdW5jZXJ0YWludHkgb2YgdGhlIGVkZ2VzIG9mIHRoZSBjbHVzdGVycyB3aXRoIGEgcGxvdCBvZiB0aGUgY29udmV4IGh1bGwgb2YgY2x1c3RlciBib3VuZGFyaWVzIGZvciBvYnNlcnZhdGlvbnMgdGhhdCBlbmRlZCB1cCBpbiB0aGUgc2FtZSBjbHVzdGVyID4geCUgb2YgdGhlIHRpbWU6DQoNCmBgYHtyIHBsb3RfNjAsIG1lc3NhZ2U9Rix3YXJuaW5nPUYsZmlnLndpZHRoPTksIGZpZy5oZWlnaHQ9Nn0NCmFsbF9kYXRhIDwtIGRhdGEuZnJhbWUoaXJpc19wY2EkeCkNCmFsbF9kYXRhJGNsdXN0ZXIgPC0gcGNhX2NsdXN0ZXIkY2x1c3Rlcg0KDQphbGxfZGF0YSA8LSBhbGxfZGF0YVtwYWlyX3NhbWVuZXNzICA+PSAwLjYsXQ0KDQpwZXJmX3BvaW50cyA8LSBzYXBwbHkoMTozLCBGVU4gPSBmdW5jdGlvbihpKSB7DQogIHJlbF9wb2ludHMgPC0gZ3JEZXZpY2VzOjpjaHVsbCh4ID0gYWxsX2RhdGFbYWxsX2RhdGEkY2x1c3RlciA9PSBpLDE6Ml0pDQogIHJldHVybihjKHJlbF9wb2ludHMscmVsX3BvaW50c1sxXSkpDQp9KQ0KDQpwbG90X2x5KHR5cGUgPSAnc2NhdHRlcicpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAxLDFdLCB5ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDEsMl0sIG5hbWUgPSAnY2x1c3RlciAxJykgJT4lDQogIGFkZF90cmFjZSh4ID0gYWxsX2RhdGEkUEMxW2FsbF9kYXRhJGNsdXN0ZXIgPT0gMV1bcGVyZl9wb2ludHNbWzFdXV0sIHkgPSBhbGxfZGF0YSRQQzJbYWxsX2RhdGEkY2x1c3RlciA9PSAxXVtwZXJmX3BvaW50c1tbMV1dXSwgbW9kZSA9ICdsaW5lcycsIG5hbWUgPSAnPj0gODAlIHN0YWJsZSBjbHVzdGVyIGJvdW5kYXJ5JykgJT4lDQogIGFkZF90cmFjZSh4ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDIsMV0sIHkgPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMiwyXSkgJT4lDQogIGFkZF90cmFjZSh4ID0gYWxsX2RhdGEkUEMxW2FsbF9kYXRhJGNsdXN0ZXIgPT0gMl1bcGVyZl9wb2ludHNbWzJdXV0sIHkgPSBhbGxfZGF0YSRQQzJbYWxsX2RhdGEkY2x1c3RlciA9PSAyXVtwZXJmX3BvaW50c1tbMl1dXSwgbW9kZSA9ICdsaW5lcycpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAzLDFdLCB5ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDMsMl0pICU+JQ0KICBhZGRfdHJhY2UoeCA9IGFsbF9kYXRhJFBDMVthbGxfZGF0YSRjbHVzdGVyID09IDNdW3BlcmZfcG9pbnRzW1szXV1dLCB5ID0gYWxsX2RhdGEkUEMyW2FsbF9kYXRhJGNsdXN0ZXIgPT0gM11bcGVyZl9wb2ludHNbWzNdXV0sIG1vZGUgPSAnbGluZXMnKSAlPiUNCiAgbGF5b3V0KHNob3dsZWdlbmQgPSBGLCB4YXhpcyA9IGxpc3QodGl0bGUgPSAnUEMxJyksIHlheGlzID0gbGlzdCh0aXRsZSA9ICdQQzInKSwgdGl0bGUgPSAnNjAlIHBhaXJuZXNzIHN0YWJpbGl0eScpDQpgYGANCg0KYGBge3IgcGxvdF84MCwgbWVzc2FnZT1GLHdhcm5pbmc9RixmaWcud2lkdGg9OSwgZmlnLmhlaWdodD02fQ0KYWxsX2RhdGEgPC0gZGF0YS5mcmFtZShpcmlzX3BjYSR4KQ0KYWxsX2RhdGEkY2x1c3RlciA8LSBwY2FfY2x1c3RlciRjbHVzdGVyDQoNCmFsbF9kYXRhIDwtIGFsbF9kYXRhW3BhaXJfc2FtZW5lc3MgPj0gMC44LF0NCg0KcGVyZl9wb2ludHMgPC0gc2FwcGx5KDE6MywgRlVOID0gZnVuY3Rpb24oaSkgew0KICByZWxfcG9pbnRzIDwtIGdyRGV2aWNlczo6Y2h1bGwoeCA9IGFsbF9kYXRhW2FsbF9kYXRhJGNsdXN0ZXIgPT0gaSwxOjJdKQ0KICByZXR1cm4oYyhyZWxfcG9pbnRzLHJlbF9wb2ludHNbMV0pKQ0KfSkNCg0KcGxvdF9seSh0eXBlID0gJ3NjYXR0ZXInKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMSwxXSwgeSA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAxLDJdLCBuYW1lID0gJ2NsdXN0ZXIgMScpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGFsbF9kYXRhJFBDMVthbGxfZGF0YSRjbHVzdGVyID09IDFdW3BlcmZfcG9pbnRzW1sxXV1dLCB5ID0gYWxsX2RhdGEkUEMyW2FsbF9kYXRhJGNsdXN0ZXIgPT0gMV1bcGVyZl9wb2ludHNbWzFdXV0sIG1vZGUgPSAnbGluZXMnLCBuYW1lID0gJz49IDgwJSBzdGFibGUgY2x1c3RlciBib3VuZGFyeScpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAyLDFdLCB5ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDIsMl0pICU+JQ0KICBhZGRfdHJhY2UoeCA9IGFsbF9kYXRhJFBDMVthbGxfZGF0YSRjbHVzdGVyID09IDJdW3BlcmZfcG9pbnRzW1syXV1dLCB5ID0gYWxsX2RhdGEkUEMyW2FsbF9kYXRhJGNsdXN0ZXIgPT0gMl1bcGVyZl9wb2ludHNbWzJdXV0sIG1vZGUgPSAnbGluZXMnKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMywxXSwgeSA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAzLDJdKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBhbGxfZGF0YSRQQzFbYWxsX2RhdGEkY2x1c3RlciA9PSAzXVtwZXJmX3BvaW50c1tbM11dXSwgeSA9IGFsbF9kYXRhJFBDMlthbGxfZGF0YSRjbHVzdGVyID09IDNdW3BlcmZfcG9pbnRzW1szXV1dLCBtb2RlID0gJ2xpbmVzJykgJT4lDQogIGxheW91dChzaG93bGVnZW5kID0gRiwgeGF4aXMgPSBsaXN0KHRpdGxlID0gJ1BDMScpLCB5YXhpcyA9IGxpc3QodGl0bGUgPSAnUEMyJyksIHRpdGxlID0gJzgwJSBwYWlybmVzcyBzdGFiaWxpdHknKQ0KYGBgDQoNCmBgYHtyIHBsb3RfOTAsIG1lc3NhZ2U9Rix3YXJuaW5nPUYsZmlnLndpZHRoPTksIGZpZy5oZWlnaHQ9Nn0NCmFsbF9kYXRhIDwtIGRhdGEuZnJhbWUoaXJpc19wY2EkeCkNCmFsbF9kYXRhJGNsdXN0ZXIgPC0gcGNhX2NsdXN0ZXIkY2x1c3Rlcg0KDQphbGxfZGF0YSA8LSBhbGxfZGF0YVtwYWlyX3NhbWVuZXNzICA+PSAwLjk1LF0NCg0KcGVyZl9wb2ludHMgPC0gc2FwcGx5KDE6MywgRlVOID0gZnVuY3Rpb24oaSkgew0KICByZWxfcG9pbnRzIDwtIGdyRGV2aWNlczo6Y2h1bGwoeCA9IGFsbF9kYXRhW2FsbF9kYXRhJGNsdXN0ZXIgPT0gaSwxOjJdKQ0KICByZXR1cm4oYyhyZWxfcG9pbnRzLHJlbF9wb2ludHNbMV0pKQ0KfSkNCg0KcGxvdF9seSh0eXBlID0gJ3NjYXR0ZXInKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMSwxXSwgeSA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAxLDJdLCBuYW1lID0gJ2NsdXN0ZXIgMScpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGFsbF9kYXRhJFBDMVthbGxfZGF0YSRjbHVzdGVyID09IDFdW3BlcmZfcG9pbnRzW1sxXV1dLCB5ID0gYWxsX2RhdGEkUEMyW2FsbF9kYXRhJGNsdXN0ZXIgPT0gMV1bcGVyZl9wb2ludHNbWzFdXV0sIG1vZGUgPSAnbGluZXMnLCBuYW1lID0gJz49IDgwJSBzdGFibGUgY2x1c3RlciBib3VuZGFyeScpICU+JQ0KICBhZGRfdHJhY2UoeCA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAyLDFdLCB5ID0gaXJpc19wY2EkeFtwY2FfY2x1c3RlciRjbHVzdGVyID09IDIsMl0pICU+JQ0KICBhZGRfdHJhY2UoeCA9IGFsbF9kYXRhJFBDMVthbGxfZGF0YSRjbHVzdGVyID09IDJdW3BlcmZfcG9pbnRzW1syXV1dLCB5ID0gYWxsX2RhdGEkUEMyW2FsbF9kYXRhJGNsdXN0ZXIgPT0gMl1bcGVyZl9wb2ludHNbWzJdXV0sIG1vZGUgPSAnbGluZXMnKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBpcmlzX3BjYSR4W3BjYV9jbHVzdGVyJGNsdXN0ZXIgPT0gMywxXSwgeSA9IGlyaXNfcGNhJHhbcGNhX2NsdXN0ZXIkY2x1c3RlciA9PSAzLDJdKSAlPiUNCiAgYWRkX3RyYWNlKHggPSBhbGxfZGF0YSRQQzFbYWxsX2RhdGEkY2x1c3RlciA9PSAzXVtwZXJmX3BvaW50c1tbM11dXSwgeSA9IGFsbF9kYXRhJFBDMlthbGxfZGF0YSRjbHVzdGVyID09IDNdW3BlcmZfcG9pbnRzW1szXV1dLCBtb2RlID0gJ2xpbmVzJykgJT4lDQogIGxheW91dChzaG93bGVnZW5kID0gRiwgeGF4aXMgPSBsaXN0KHRpdGxlID0gJ1BDMScpLCB5YXhpcyA9IGxpc3QodGl0bGUgPSAnUEMyJyksIHRpdGxlID0gJzk1JSBwYWlybmVzcyBzdGFiaWxpdHknKQ0KYGBg